#Alexander F. Gazmararian
#afg2@princeton.edu

# load packages
library(tidyverse)
library(scales)
library(modelsummary)
library(gridExtra)
library(kableExtra)

# load functions
source("code/fun/savefig.r")
source("code/fun/book_theme.r")

# load base model specification
source("code/fun/model_spec.r")

#load variable names
source("code/fun/coefnames4tables.r")
source("code/fun/fix_txt.r")

# load data
g <- readRDS("data/NatRegQual_Winter21.rds")

#create function to prepare data
flip_industry <- function(question, sample_subset = c("National2", "National1")) {
  require(tidyr)
  require(dplyr)
  g_sub <- subset(g, mastersample %in% sample_subset)
  out <-
    g_sub %>%
    tidyr::pivot_longer(paste0(question, "_", 1:10)) %>%
    tidyr::separate(name, into = c("question", "industry"), sep = "_") %>%
    mutate(
      industry = case_when(
        industry == "1" ~ "Solar Energy",
        industry == "2" ~ "Wind Energy",
        industry == "3" ~ "Coal Mining",
        industry == "4" ~ "Oil and Gas",
        industry == "5" ~ "Healthcare",
        industry == "6" ~ "Trucking",
        industry == "7" ~ "Computer Programming",
        industry == "8" ~ "Energy Efficiency",
        industry == "9" ~ "Auto Manufacturing",
        industry == "10" ~ "Environmental Cleanup"
      )) %>%
    dplyr::filter(!is.na(value)) 
  return(out)
}

#specify plot theme
localecon_theme <- list(
  geom_bar(aes(fill = value), position = "fill"),
  coord_flip(),
  scale_fill_grey(start = 0.8, end = 0.2),
  theme_minimal(base_size = 13),
  scale_y_continuous(expand = c(0,0), labels = scales::percent),
  theme(
    legend.position = "bottom",
    plot.margin = margin(r=25),
    legend.box.margin = margin(t=-25,b=10,l=-50),
    legend.text = element_text(size=9)
  ),
  guides(fill=guide_legend(reverse = TRUE))
)
# Create Figure 4.1 ----

## Future jobs
fjobs_data <- flip_industry("FutureJobs")
#prep values
fjobs_data <-
  fjobs_data %>%
  mutate(
    value = case_when(
      value %in% c("Very Likely to Fire More", "Somewhat Likely to Fire More") ~ "Fire More",
      value %in% c("Somewhat Likely to Hire More", "Very Likely to Hire More") ~ "Hire More",
      T ~ as.character(value)
    ),
    value = factor(
      value,
      ordered = TRUE,
      levels = rev(c("No Jobs in This Industry", "Fire More", "Hire More"))
    )
  )
#check order
fjobs_order <-
  fjobs_data %>%
  group_by(industry, value) %>%
  count() %>%
  group_by(industry) %>%
  mutate(per = n / sum(n)) %>%
  filter(value == "Hire More") %>%
  arrange(per)
fjobs_order
fjobs_order <- fjobs_order$industry
#create plot
p.futurejobs <-
  fjobs_data %>%
  ggplot(aes(x=industry)) +
  localecon_theme +
  labs(x="",y="",fill="", title = "Future Jobs") +
  scale_x_discrete(limits = fjobs_order)
p.futurejobs

## Local jobs
local_data <- flip_industry("LocalJobs")
#prepare values
local_data <-
  local_data %>%
  mutate(
    value = case_when(
      value %in% c("Almost All Outside Workers", "Mostly Outside Workers") ~ "More Outside Workers",
      value %in% c("Equally Local and Outside Workers") ~ "Equally Local and Outside Workers",
      value %in% c("Mostly Local Workers", "Almost All Local Workers") ~ "More Local Workers"
    ),
    value = factor(
      value,
      ordered = TRUE,
      levels = rev(c("More Outside Workers", "Equally Local and Outside Workers", "More Local Workers"))
    )
  )
#prepare order
local_order <-
  local_data %>%
  group_by(industry, value) %>%
  count() %>%
  group_by(industry) %>%
  mutate(per = n / sum(n)) %>%
  filter(value == "More Local Workers") %>%
  arrange(per)
local_order
local_order <- local_order$industry
#compare to regional
flip_industry(question = "LocalJobs", sample_subset = c("swparegion_nonurban", "gulf_coast_nonurban")) %>%
  mutate(
    value = case_when(
      value %in% c("Almost All Outside Workers", "Mostly Outside Workers") ~ "More Outside Workers",
      value %in% c("Equally Local and Outside Workers") ~ "Equally Local and Outside Workers",
      value %in% c("Mostly Local Workers", "Almost All Local Workers") ~ "More Local Workers"
    ),
    value = factor(
      value,
      ordered = TRUE,
      levels = rev(c("More Outside Workers", "Equally Local and Outside Workers", "More Local Workers"))
    )
  ) %>%
  group_by(industry, value) %>%
  count() %>%
  group_by(industry) %>%
  mutate(per = n / sum(n)) %>%
  filter(value == "More Local Workers") %>%
  arrange(per)
#create plot
p.localjobs <-
  local_data %>%
  ggplot(aes(x=industry)) +
  localecon_theme +
  labs(x="",y="",fill="", title = "Local Jobs") +
  scale_x_discrete(limits = local_order)
p.localjobs
##Combine future and local jobs data

p.futurelocal <- grid.arrange(p.futurejobs, p.localjobs)

savefig(p.futurelocal, "4.1_figure_futurelocaljobs", height = 5, filepath = "figures/")

# Create Figure 4.2 ----

## Temporariness
temp_data <- flip_industry("TempJobs")
temp_data <-
  temp_data %>%
  mutate(
    value = case_when(
      value %in% c("Very Short-Term", "Somewhat Short-Term") ~ "Short-Term",
      value %in% c("Somewhat Long-Term", "Very Long-Term") ~ "Long-Term"
    ),
  )
#create industry order
temp_order <-
  temp_data %>%
  group_by(industry, value) %>%
  count() %>%
  group_by(industry) %>%
  mutate(per = n / sum(n)) %>%
  filter(value == "Long-Term") %>%
  arrange(per)
temp_order <- temp_order$industry
#create plot
p.tempjobs <-
  temp_data %>%
  ggplot(aes(x=industry)) +
  localecon_theme +
  labs(x="",y="",fill="", title = "Temporariness") +
  scale_x_discrete(limits = temp_order)
p.tempjobs

##Raise
raises_data <- flip_industry("Raises")
raises_data <-
  raises_data %>%
  mutate(
    value = case_when(
      value %in% c("Large Pay Raises", "Small Pay Raises") ~ "Pay Raises",
      value %in% c("No Raises") ~ "No Raises",
      value %in% c("Large Pay Cuts", "Small Pay Cuts") ~ "Pay Cuts"
    ),
    value = factor(
      value,
      ordered = TRUE,
      levels = c("Pay Raises", "No Raises", "Pay Cuts")
    )
  )
raises_order <-
  raises_data %>%
  group_by(industry, value) %>%
  count() %>%
  group_by(industry) %>%
  mutate(per = n / sum(n)) %>%
  filter(value == "Pay Raises") %>%
  arrange(per)
raises_order <- raises_order$industry

#create plot
p.raises <-
  raises_data %>%
  ggplot(aes(x=industry)) +
  localecon_theme +
  labs(x="",y="",fill="", title = "Raises") +
  scale_x_discrete(limits = raises_order)
p.raises

##Combine temporariness and raises
p.tempraises <- grid.arrange(p.tempjobs, p.raises)

savefig(p.tempraises, "4.2_figure_tempraisesjobs", height = 5, filepath = "figures/")

# Regional analysis----

# Specify empirical model
f.reg <- y ~ age + Female + Black + Hispanic + income5 + ffemploy + CollegeDegree + PartySummary + employfull + gw_index_all + mastersample_rural

#set reference categories
g$MasterSampleSlim <- relevel(factor(g$MasterSampleSlim), ref = "National")
g$mastersample_rural <- relevel(factor(g$mastersample_rural), ref = "National")

#create function to estimate models
estimate_regional_ols <- function(outcome, df = g) {
  df$y <- df[, outcome] 
  model <- list()
  #estimate base model
  model[[1]] <- lm(f.reg, df)
  #add objective data
  model[[2]] <- lm(update(f.reg, ~ . + scale(coal_emp_ctyzip) + scale(oilgas_emp_ctyzip) + scale(totalMW_Solar) + scale(totalMW_Wind)), df)
  #add optimism
  if (grepl("future", tolower(outcome))) {
    f.reg.optimism <- update(f.reg, ~ . + totalfuturejobs_slim_sc)
  }
  else {
    f.reg.optimism <- update(f.reg, ~ . + totallocaljobs_slim_sc)
  }
  model[[3]] <- lm(f.reg.optimism, df)
  return(model)
}

#function to create tables
make_table <- function(models, name, title, outcome1, outcome2) {
  colnames_chr <- c(" ", outcome1, outcome2)
  colsize <- c(1, 3, 3)
  names(colsize) <- colnames_chr
  file_out <- paste0("tables/ch4/ols_", name, ".txt")
  modelsummary(
    models,
    stars = c("*"=.1,"**"=.05,"***"=.01),
    vcov = "HC2",
    coef_map = coefnames,
    gof_map = c("nobs", "adj.r.squared"),
    escape=FALSE,
    output="latex"
  ) %>%
    add_header_above(colsize) %>%
    cat(., file = file_out)
  fix_txt(file_out)
}

# Specify outcomes
dv <- c("FutureJobs_Wind", "FutureJobs_Solar", "FutureJobs_Oil", "FutureJobs_Coal", "LocalJobs_Wind", "LocalJobs_Solar", "LocalJobs_Oil", "LocalJobs_Coal",
        "FutureJobs_Eff", "FutureJobs_Clean", "LocalJobs_Eff", "LocalJobs_Clean")

# Loop over models
reg.ols <- lapply(dv, estimate_regional_ols)

#create table for future jobs
make_table(models = c(reg.ols[[1]], reg.ols[[2]]), name = "futurejobs_green", outcome1 = "Wind:", outcome2 = "Solar:")
make_table(models = c(reg.ols[[3]], reg.ols[[4]]), name = "futurejobs_oilcoal", outcome1 = "Oil and Gas:", outcome2 = "Coal:")
make_table(models = c(reg.ols[[9]], reg.ols[[10]]), name = "futurejobs_effclean", outcome1 = "Energy Efficiency:", outcome2 = "Environmental Cleanup:")

#create table for local jobs
make_table(models = c(reg.ols[[5]], reg.ols[[6]]), name = "localjobs_green", outcome1 = "Wind:", outcome2 = "Solar:")
make_table(models = c(reg.ols[[7]], reg.ols[[8]]), name = "localjobs_oilcoal", outcome1 = "Oil and Gas:", outcome2 = "Coal:")
make_table(models = c(reg.ols[[11]], reg.ols[[12]]), name = "localjobs_effclean", outcome1 = "Energy Efficiency:", outcome2 = "Environmental Cleanup:")

# Within-subject analysis----
#specify outcomes
within.df <- c("futurewindoildiff", "futuresolaroildiff", "localwindoildiff", "localsolaroildiff", "localsolarcoaldiff", "localwindcoaldiff",  "futurewindcoaldiff", "futuresolarcoaldiff",
               "localsolarhealthdiff", "localwindhealthdiff", "futurewindhealthdiff", "futuresolarhealthdiff")

#loop over models
within.ols <- lapply(within.df, estimate_regional_ols)

#create table for future jobs
make_table(models = c(within.ols[[1]], within.ols[[2]]), name = "within_future_windsolar_vs_oilgas", outcome1 = "Wind:", outcome2 = "Solar:")
make_table(models = c(within.ols[[7]], within.ols[[8]]), name = "within_future_windsolar_vs_coal", outcome1 = "Wind:", outcome2 = "Solar:")
make_table(models = c(within.ols[[11]], within.ols[[12]]), name = "within_future_windsolar_vs_health", outcome1 = "Wind:", outcome2 = "Solar:")

#create table for local jobs
make_table(models = c(within.ols[[3]], within.ols[[4]]), name = "within_local_windsolar_vs_oilgas", outcome1 = "Wind:", outcome2 = "Solar:")
make_table(models = c(within.ols[[5]], within.ols[[6]]), name = "within_local_windsolar_vs_coal", outcome1 = "Wind:", outcome2 = "Solar:")
make_table(models = c(within.ols[[9]], within.ols[[10]]), name = "within_local_windsolar_vs_health", outcome1 = "Wind:", outcome2 = "Solar:")
